The scope of this project is to make prediction on hourly “Temperature” based on the prior 24 hours data from the weather conditions dataset recorded at the weather station at the Max Planck Institute for Biogeochemistry in Jena, Germany, over several years. The dataset is structured in hourly based from 01-01-2009 to 01-01-2017. This predictive model is built in iPython environment with Recurrent Neural Network libraries, Keras and LSTM.
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv("./data/climate_hour.csv")
print(df.shape)
display(df.head(5))
print("Time stamp range is %s - %s" %(df["Date Time"].iloc[0],df["Date Time"].iloc[-1] ))
edaDf=df.copy()
date_time=list(pd.to_datetime(list(map(lambda x:x.replace("_"," "), edaDf["Date Time"]))))
edaDf["Date Time"] = date_time
edaDf.set_index("Date Time", inplace=True)
edaDf.head(5)
edaDf.isna().sum()
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
corrDf = corrDf.groupby("Date Time")["T (degC)"].mean()
corrDf =pd.DataFrame(corrDf)
x_labels = corrDf.index
y_pos = np.arange(len(x_labels))
performance = corrDf["T (degC)"]
plt.figure(figsize=(15,5))
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, x_labels)
plt.ylabel('Mean Deg C')
plt.xlabel("Hours")
plt.title('Mean Deg C in 24 hours ')
plt.show()
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="14"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="15"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="16"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="04"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="05"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
corrDf = df.loc[:][["Date Time", "T (degC)"]]
t = [item[-8:-6] for item in corrDf["Date Time"]]
corrDf["Date Time"] = t
degLst = list(corrDf["T (degC)"][corrDf["Date Time"]=="06"])
import random
tempLst = np.array(list(degLst)) #Convert the temperature list to numpy array
print(np.mean(tempLst))
bLst=[]
cnt=0
t_stat=1.96
while cnt < len(degLst):# Sampling 100 times
#Calculate and Store the every Mean value of Sampling Trail
meanValue = np.mean(np.random.choice(tempLst, replace=True, size = len(tempLst)))
bLst.append(meanValue)
cnt+=1
print("Number of Samples per Trail (Degree of Freedom:):", len(tempLst))
print("t-statistic (from table):", t_stat)
print("Mean of Bootstrapped Samples:",np.mean(bLst))
print("Std of Bootstrapped Samples:",np.std(bLst))
print("Confidence Interval is %0.3f - %0.3f"%(np.mean(bLst)-(np.std(bLst) * 1.964/ np.sqrt(len(bLst))), np.mean(bLst)+(np.std(bLst) * 1.96 / np.sqrt(len(bLst)))))
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(10,10))
ax=sns.heatmap(edaDf.corr(), annot=True)
for i,col in enumerate(edaDf.columns):
plt.figure(figsize=(50,100))
plt.subplot(14,1,i+1)
edaDf[col].plot(fontsize=20)
#plt.legend(fontsize=20)
plt.ylabel(col, fontsize=35)
plt.show()
display(edaDf.describe())
for i,col in enumerate(edaDf.columns):
plt.figure(figsize=(50,100))
plt.subplot(14,1,i+1)
sns.boxplot(x=col,data=edaDf, orient="h", palette="Set"+str(i%3+1))
plt.ylabel(col, fontsize=35)
plt.tick_params(labelsize=30)
plt.show()
bol_df = edaDf.copy()
bol_low_df = edaDf.rolling(24).mean() - (edaDf.rolling(96).std()*3) #Calculate rolling mean + 3x rolling std
bol_high_df = edaDf.rolling(24).mean() + (edaDf.rolling(96).std()*3) #Calculate rolling mean - 3x rolling std
plt.figure(figsize=(20,10))
#Plot upper-bound, lower-bound and actual T(degC) values
bol_high_df["T (degC)"].plot(color="blue")
bol_low_df["T (degC)"].plot(color="green")
bol_df["T (degC)"].plot(color="red")
plt.legend(("Deg C high bound","Deg C low bound", "Deg C"), fontsize=15)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("Bollinger Plot with (24 Hrs windows)", fontsize=20)
plt.xlabel("Date Time", fontsize=20)
plt.ylabel("T (degC)", fontsize=16)
plt.show()
df = pd.read_csv("./data/climate_hour.csv")
df["Date Time"]= df["Date Time"].map(lambda x: str(x).replace(" ","_"))
df.head(3)
x_train_last_index =df.index[df["Date Time"] == "31.12.2014_22:00:00"][0]
x_train = df.loc[:x_train_last_index]
display(x_train.head(3))
display(x_train.tail(3))
x_test_start_index =df.index[df["Date Time"] == "31.12.2014_00:00:00"][0]
x_test= df.loc[x_test_start_index:]
x_test=x_test.iloc[:-1]
display(x_test.head(3))
display(x_test.tail(3))
timestep = 24
arr_train = np.array(x_train.iloc[:,1:])
arr_test = np.array(x_test.iloc[:,1:])
timestep=24
reshape_length = timestep * arr_train.shape[1]
lst=[]
for i in range(arr_train.shape[0]-timestep+1):
arr=arr_train[i:i+timestep]
lst.append(arr.reshape(reshape_length))
arr_train=np.array(lst)
print(arr_train.shape)
display(arr_train[:3])
display(arr_train[-3:])
timestep=24
reshape_length = timestep * arr_test.shape[1]
lst=[]
for i in range(arr_test.shape[0]-timestep+1):
arr=arr_test[i:i+timestep]
lst.append(arr.reshape(reshape_length))
arr_test=np.array(lst)
print(arr_test.shape)
display(arr_test[:3])
display(arr_test[-3:])
timestep=24
y_train = np.array(df.iloc[timestep: timestep + arr_train.shape[0]]["T (degC)"])
#y_train = y_train.reshape(y_train.shape[0],1)
display(y_train.shape)
display(y_train[:3])
display(y_train[-3:])
x_test_start_index =df.index[df["Date Time"] == "31.12.2014_00:00:00"][0]
y_test= df.loc[x_test_start_index+timestep:]["T (degC)"]
display(y_test.shape)
display(y_test[:3])
display(y_test[-3:])
df2=df.copy()
for i, colName in enumerate(df2.columns[1:]):
df2[colName] = (df2[colName] - np.min(df2[colName])) / (np.max(df2[colName]) - np.min(df2[colName]))
print(df2.shape)
df2.head(3)
x_train_last_index =df2.index[df2["Date Time"] == "31.12.2014_22:00:00"][0]
x_train = df2.loc[:x_train_last_index]
x_test_start_index =df2.index[df2["Date Time"] == "31.12.2014_00:00:00"][0]
x_test= df2.loc[x_test_start_index:]
x_test=x_test.iloc[:-1]
timestep = 24
arr_train = np.array(x_train.iloc[:,1:])
arr_test = np.array(x_test.iloc[:,1:])
timestep=24
reshape_length = timestep * arr_train.shape[1]
lst=[]
for i in range(arr_train.shape[0]-timestep+1):
arr=arr_train[i:i+timestep]
lst.append(arr.reshape(reshape_length))
arr_train=np.array(lst)
print(arr_train.shape)
display(arr_train[:3])
display(arr_train[-3:])
timestep=24
reshape_length = timestep * arr_test.shape[1]
lst=[]
for i in range(arr_test.shape[0]-timestep+1):
arr=arr_test[i:i+timestep]
lst.append(arr.reshape(reshape_length))
arr_test=np.array(lst)
print(arr_test.shape)
display(arr_test[:3])
display(arr_test[-3:])
n_feature=14 #Number features in every timestep
timestep = 24 #Number of timestep we going to use to predict the next value
print("2D Train",arr_train.shape, y_train.shape)
print("2D Test",arr_test.shape, y_test.shape)
print()
arr_train= np.reshape(arr_train, (arr_train.shape[0],timestep, n_feature))
arr_test= np.reshape(arr_test, (arr_test.shape[0],timestep, n_feature))
print("3D Train",arr_train.shape, y_train.shape)
print("3D Test",arr_test.shape, y_test.shape)
import keras #Need to downgrade keras to 2.1.2 Version (pip install kera)
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
#Stacked LSTM network
model = Sequential()
model.add(LSTM(16,activation="tanh",input_shape = (timestep, n_feature),return_sequences=True))
model.add(LSTM(16))
model.add(Dense(8, kernel_initializer="normal",activation="linear"))
model.add(Dense(1, kernel_initializer="normal",activation="linear"))
#To save the best model by monitor the "Validation Loss"
checkpoint = ModelCheckpoint(filepath="./model/best_mod.h5",
monitor='val_loss',
verbose=1,
save_best_only=True,
mode='min')
callbacks_list = [checkpoint]
model.compile(loss="mae", optimizer="adam")
batch_size = 200
no_epoch=1000
#fit network
history = model.fit(arr_train, y_train,
epochs=no_epoch,
batch_size=batch_size,
#steps_per_epoch = step_per_epoch,
validation_data=(arr_test,y_test),
callbacks=callbacks_list)
#Save models in disk
model.save("./model/model_EP%s_b.h5"%(no_epoch))
#Save Model History (Loss vs Epoch)
epochs= np.linspace(1,no_epoch,no_epoch,dtype=int)
pd.DataFrame(list(zip(epochs,history.history["loss"],history.history["val_loss"])), \
columns=["epoch", "loss","val_loss"]).to_csv("./model/history_EP%s"%(str(no_epoch)) )
pd.read_csv("Result_Table.csv").replace("NaN", " ")
hist1=pd.read_csv("./model_base_BS200_EP500_node8/history_EP500")
hist2=pd.read_csv("./model_base_BS200_EP500_node14/history_EP500")
hist3=pd.read_csv("./model_base_BS200_EP500_node32/history_EP500")
plt.figure(figsize=(20,7))
plt.suptitle("LSTM(act)= tanh, Dense(act)=default,Batch_size=200, Epoch=500", fontsize=25)
plt.subplot(1,3,1)
plt.boxplot(x=[hist1.loss, hist1.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.ylabel("Loss", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 8", fontsize=20)
plt.subplot(1,3,2)
plt.boxplot(x=[hist2.loss, hist2.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 14",fontsize=20)
plt.subplot(1,3,3)
plt.boxplot(x=[hist3.loss, hist3.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.35, 0.525)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 32",fontsize=20)
#plt.subplot(2,1,1)
histAll = pd.concat((hist1.epoch,
hist1.loss,hist1.val_loss,
hist2.loss,hist2.val_loss,
hist3.loss,hist3.val_loss), axis=1)
histAll.columns=["epoch","node8_loss", "node8_val_loss","node14_loss", "node14_val_loss","node32_loss", "node32_val_loss"]
histAll.plot(x="epoch", figsize=(20,5))
plt.ylim(0.35,0.7)
plt.ylabel("Loss", fontsize=18)
plt.xlabel("Epoch", fontsize = 18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.legend(fontsize=12)
plt.show()
hist1=pd.read_csv("./model_base_BS200_EP300_node14_Drop0.5/history_EP300")
hist2=pd.read_csv("./model_base_BS200_EP300_node32_Drop0.5/history_EP300")
plt.figure(figsize=(20,7))
plt.suptitle("LSTM(act)= tanh, Dense(act)=default,Batch_size=200, Epoch=300, dropout=0.5", fontsize=25)
plt.subplot(1,2,1)
plt.boxplot(x=[hist1.loss, hist1.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0, 6)
plt.ylabel("Loss", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 14", fontsize=20)
plt.subplot(1,2,2)
plt.boxplot(x=[hist2.loss, hist2.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.3, 1.6)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 32", fontsize=20)
#plt.subplot(2,1,1)
histAll = pd.concat((hist1.epoch,hist1.loss,hist1.val_loss,hist2.loss,hist2.val_loss), axis=1)
histAll.columns=["epoch","node14_loss", "node14_val_loss","node32_loss", "node32_val_loss"]
histAll.plot(x="epoch", figsize=(20,5))
plt.ylim(0.3,6)
plt.ylabel("Loss", fontsize=18)
plt.xlabel("Epoch", fontsize = 18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.legend(fontsize=12)
plt.show()
hist1=pd.read_csv("./model_BS200_node8_tanh_linear/history_EP500")
hist2=pd.read_csv("./model_BS200_node16_tanh_linear/history_EP500")
hist3=pd.read_csv("./model_BS200_node32_tanh_linear/history_EP300")
plt.figure(figsize=(20,7))
plt.suptitle("LSTM(act)= tanh, Dense(act)=linear,Batch_size=200, Epoch=500, Dense(kernel_initializer=normal)", fontsize=25)
plt.subplot(1,3,1)
plt.boxplot(x=[hist1.loss, hist1.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.ylabel("Loss", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 8", fontsize=20)
plt.subplot(1,3,2)
plt.boxplot(x=[hist2.loss, hist2.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 16",fontsize=20)
plt.subplot(1,3,3)
plt.boxplot(x=[hist3.loss, hist3.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(6.6, 7)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("node = 32",fontsize=20)
histAll = pd.concat((hist1.epoch,
hist1.loss,hist1.val_loss,
hist2.loss,hist2.val_loss,
hist3.loss,hist3.val_loss), axis=1)
histAll.columns=["epoch","node8_loss", "node8_val_loss",
"node16_loss", "node16_val_loss",
"node32_loss", "node32_val_loss"]
histAll.plot(x="epoch", figsize=(20,5))
#plt.ylim(0.35,0.7)
plt.ylabel("Loss", fontsize=18)
plt.xlabel("Epoch", fontsize = 18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.legend(fontsize=12)
plt.show()
hist1=pd.read_csv("./model_BS200_node8_tanh_linear/history_EP500")
hist2=pd.read_csv("./model_BS100_EP300_node16_tanh_linear/history_EP300")
hist3=pd.read_csv("./model_BS50_EP300_node16_tanh_linear/history_EP400")
plt.figure(figsize=(20,7))
plt.suptitle("LSTM(act)= tanh, Dense(act)=linear, nodes=16 , Epoch=300, Dense(kernel_initializer=normal)", fontsize=25)
plt.subplot(1,3,1)
plt.boxplot(x=[hist1.loss, hist1.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.ylabel("Loss", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("Batch size = 200", fontsize=20)
plt.subplot(1,3,2)
plt.boxplot(x=[hist2.loss, hist2.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("Batch size = 100",fontsize=20)
plt.subplot(1,3,3)
plt.boxplot(x=[hist3.loss, hist3.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.425, 0.525)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("Batch size = 50",fontsize=20)
histAll = pd.concat((hist1.epoch,
hist1.loss,hist1.val_loss,
hist2.loss,hist2.val_loss,
hist3.loss,hist3.val_loss), axis=1)
histAll.columns=["epoch","node8_loss", "node8_val_loss",
"node16_loss", "node16_val_loss",
"node32_loss", "node32_val_loss"]
histAll.plot(x="epoch", figsize=(20,5))
plt.ylim(0.41,0.7)
plt.ylabel("Loss", fontsize=18)
plt.xlabel("Epoch", fontsize = 18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.legend(fontsize=12)
plt.show()
hist1=pd.read_csv("./model_BS200_EP1000_node16_tanh_linear/history_EP1000")
plt.figure(figsize=(20,7))
plt.suptitle("LSTM(act)= tanh, Dense(act)=linear, nodes=16 , Epoch=300, Dense(kernel_initializer=normal)", fontsize=25)
plt.subplot(1,2,1)
plt.boxplot(x=[hist1.loss, hist1.val_loss], widths=(0.5,0.5), labels=["train_loss","val_loss"])
plt.ylim(0.42, 0.5)
plt.ylabel("Loss", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.title("Batch size = 200", fontsize=20)
hist1.plot(x="epoch", y=["loss", "val_loss"], figsize=(20,5))
plt.ylim(0.41,0.7)
plt.ylabel("Loss", fontsize=18)
plt.xlabel("Epoch", fontsize = 18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=14)
plt.legend(fontsize=12)
plt.show()
batch_size=10
model = keras.models.load_model('./model_BS200_EP1000_node16_tanh_linear/best_mod.h5')
train_score = model.evaluate(arr_train, y_train, batch_size=batch_size*2, verbose=1)
test_score = model.evaluate(arr_test, y_test, batch_size=batch_size*2, verbose=1)
print("Train score:%0.3f, Test Score:%0.3f"%(train_score, test_score))
pred_temp=model.predict(arr_test)
model.summary()
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#print(pred_temp.shape)
pred_temp = pred_temp.flatten()
#x_test_start_index =df2.index[df2["Date Time"] == "31.12.2014_00:00:00"][0]
#date_time= df2.loc[x_test_start_index+timestep:]["Date Time"]
x_test_start_index =df2.index[df2["Date Time"] == "31.12.2014_00:00:00"][0]
date_time= df2.loc[x_test_start_index+timestep:]["Date Time"]
date_time=list(pd.to_datetime(list(map(lambda x:x.replace("_"," "), date_time))))
df=pd.DataFrame(list(zip(date_time,y_test,pred_temp)), columns=["date_time", "y_test","pred_temp"])
df.set_index("date_time", inplace=True)
#plt.figure(figsize=(20,20))
df.plot(figsize=(20,10), fontsize=15)
plt.xlabel("Date Time", fontsize=18)
plt.ylabel("Deg C", fontsize=18)
plt.legend(fontsize=18)
plt.title("Temperature Plot (Actual vs Predicted)", fontsize = 22)
plt.show()
#print(pred_temp.shape)
pred_temp = pred_temp.flatten()
x_test_start_index =df2.index[df2["Date Time"] == "31.12.2014_00:00:00"][0]
date_time= df2.loc[x_test_start_index+timestep:]["Date Time"]
pred_df = pd.DataFrame(list(zip(date_time,pred_temp)), columns= ["date_time", "temperature"])
pred_df.to_csv("./submission/submit3.csv", index=False)
print("Done!!!")